tests/testthat/profiling for y_bootstrap_2_schrittig.R

load("C:/Users/nikos/Desktop/SAE_proj_data.RData")

library(microbenchmark)
library(parallel)
library(mvtnorm)

# --------------------------------------------------------------------------------------------#
# ------------------------------- calculating the sd of y ------------------------------------#
# --------------------------------------------------------------------------------------------#

# Alternative 0 ###############################################################################
# loop: for every predicted y_hat, calculate std of y_hat and put it in a vector
varsofy0 <- function(n_obs_census1, cov_coefficients1, x_census1){
  var_y0 <- rep(NA, n_obs_census1)
  for(i in 1:n_obs){
    var_y0[i] <- t(x_census1[i,]) %*% cov_coefficients1 %*% x_census[i,]
  }
  return(var_y0)
}

# Alternative 1 ###############################################################################
# loop, but matrix is transposed outside the loop
varsofy1 <- function(n_obs_census1, cov_coefficients1, x_census1){
  tx_census1 <- t(x_census1)
  var_y1 <- rep(NA, n_obs_census1)
  for(i in 1:n_obs){
    var_y1[i] <- tx_census1[,i] %*% cov_coefficients1 %*% x_census[i,]
  }
  return(var_y1)
}

# Alternative 2 ###############################################################################
# with apply function
varsofy2 <- function(n_obs_census1, cov_coefficients1, x_census1){
  var_y2 <- rep(NA, n_obs_census1)
  var_y2 <- apply(X = x_census1, MARGIN = 1, FUN = function(x){t(x) %*% cov_coefficients1 %*% x})
  return(var_y2)
}

# Alternative 3 ###############################################################################
# with apply and parallelisation
varsofy3 <- function(n_obs_census1, cov_coefficients1, x_census1){
  var_y3 <- rep(NA, n_obs_census1)
  number_of_cores <- parallel::detectCores() - 1 # calculate number of cores to use
  cl <- makeCluster(number_of_cores) # initiate clusters for parallelisation
  parallel::clusterExport(cl, c("var_y3", "x_census", "cov_coefficients"))
  a <- Sys.time()
  var_y3 <- parallel::parApply(cl, X = x_census, MARGIN = 1, FUN = function(x){t(x) %*% cov_coefficients %*% x})
  b <- Sys.time()
  parallel::stopCluster(cl)
  return(a-b)
}
varsofy3(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census)
# nicht mal die alleinige Anwendung der Parallelisierung scheint einen sinnvollen Unterschied zu machen


# Alternative 4 ###############################################################################
# with C++



mbm <- microbenchmark::microbenchmark(
  alt1 = varsofy1(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
  alt2 = varsofy2(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
  alt3 = varsofy3(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
  times = 1)

mbm


mbm <- microbenchmark::microbenchmark(
  alt0 = varsofy0(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
  alt1 = varsofy1(n_obs_census1 = n_obs_census, cov_coefficients1 = cov_coefficients, x_census1 = x_census),
  times = 3)

mbm






# --------------------------------------------------------------------------------------------#
# -------------------------- drawing the normal distr. sample --------------------------------#
# --------------------------------------------------------------------------------------------#

xbeta <- predict(inference_survey$model_fit_surv, newdata = censusdata)


# Alternative 1 ###############################################################################
# using univariate normal distribution and loop
y_random1 <- function(n_obs_census1, n_boot1 = 5, xbeta1, sd_y1){
  y_bootstrap <- matrix(NA, nrow = n_obs_census1, ncol = n_boot1)
  for (i in 1:n_obs_census1){
    y_bootstrap[i,] <- rnorm(n = n_boot1, mean = xbeta1[i], sd = sd_y1[i])
  }
}

# Alternative 2 ###############################################################################
# using univariate normal distribution and apply
y_random2 <- function(n_obs_census1, n_boot1 = 5, xbeta1, sd_y1){
  y_bootstrap <- matrix(NA, nrow = n_obs_census1, ncol = n_boot1)
  m <- cbind(xbeta1, sd_y1)
  y_bootstrap <- apply(X = m, MARGIN = 1,
                       FUN = function(x, n_boot2){rnorm(n = n_boot1, mean = x[1], sd = x[2])},
                       n_boot2 = n_boot1)
}
# all(y_random1(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y) == y_random2(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y))
# Ergebnis ist gleich. Microbenchmarking: Alt1 leicht besser als Alt2


# Alternative 3 ###############################################################################
# using multivariate normal distribution and loop
y_random1 <- function(n_obs_census1, n_boot1 = 5, xbeta1, sd_y1){
  y_bootstrap <- matrix(NA, nrow = n_obs_census1, ncol = n_boot1)
  for (i in 1:n_obs_census1){
    y_bootstrap[i,] <- rnorm(n = n_boot1, mean = xbeta1[i], sd = sd_y1[i])
  }
}



mbm <- microbenchmark::microbenchmark(
  alt1 = y_random1(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y),
  alt2 = y_random2(n_obs_census1 = n_obs_census, n_boot1 = 100, xbeta1 = xbeta, sd_y1 = sd_y),
  times = 1)

mbm
nikosbosse/SAE documentation built on May 12, 2019, 4:37 a.m.